In house data

Objective

  • In Chapter 03, we analysed YS data alone
  • Here we would like to integrate the AGM and YS data

Conclusion

Set-up

Load required library

library(scater)
library(Seurat)
library(patchwork)
library(ggplot2)
library(dplyr)
library(reshape2)
library(ggbeeswarm)
library(gghighlight)
library(ggrepel)

#library(DropletUtils)
#library(Matrix)
#library(schex)

# Scater
library(scater)
library(scran)
library(dendextend)
library(dynamicTreeCut)

# 3d plot
#library(egl)

Specify the work space

workDir <- getwd()
workDir <- gsub("/scripts", "", workDir)
dataDir <- file.path(workDir, "data")
outDir <- file.path(workDir, "output")

# out data
outData_dir <- file.path(outDir, "out_data")

outDir_cur <- file.path(outDir, "Chapter_04_InHouse_YS_AGM")
dir.create(outDir_cur)
## Warning in dir.create(outDir_cur): '/Users/zfadlullah/Dropbox (The University
## of Manchester)/zaki/PhD/sequencing_runs/2021/2021_06_01_MZ_016/output/
## Chapter_04_InHouse_YS_AGM' already exists
set.seed(123)

source(file.path(workDir, "functions.R"))

Load the integrated object

load(file=file.path(outData_dir, "ForIntegration_InHouse_Public.Rdata"))

We have read the original single cell exp object

se_next <- readRDS(file.path(outData_dir, "Se_Next_allGenes_ENSID.RDS"))
se_nova <- readRDS(file.path(outData_dir, "Se_Nova_allGenes_ENSID.RDS"))

Load the procesed YS data

se_YS <- readRDS(file.path(outData_dir, "se_YS_only_fullInfo.RDS"))

Load the AGM data from the blood paper

# Now we load the single cell experiment object
seObjDir <- "/Users/zfadlullah/Dropbox (The University of Manchester)/zaki/PhD/sequencing_runs/2019/191001_MZ_009/output/Chapter_04_E10.5"
load(file=file.path(seObjDir, "se.block_f.Rdata"))

Load INDEX sort data

# We know all cell sorted in this plate is Runx1 / Gfi1 reporter
df_inx_WN_S6 <- read.csv(file.path(dataDir, "INDEX_data/Metadata_WNs6_AGM060.csv")) %>%
  dplyr::rename(FACS_well = OriWell) %>%
  mutate(Plate = gsub("Plate", "", Plate),
         SC_project = 'WN-S6',
         UniqID = paste(SC_project, Plate, FACS_well, sep="_")) %>%
dplyr::select(UniqID, Population2, Runx1b_RFP, Gfi1_GFP) %>%
  mutate(Allele = "RG")

# plate WN S7
df_inx_WN_S7 <- read.csv(file.path(dataDir, "INDEX_data/Metadata_WNs7_WNs8_AGM068.csv")) %>%
  dplyr::rename(FACS_well = OriWell) %>%
  mutate(tmp = Plate,
         SC_project = gsub("_p[1-5]", "", tmp),
         SC_project = gsub("_", "-", SC_project),
         Plate = gsub(".*_p", "", tmp),
         UniqID = paste(SC_project, Plate, FACS_well, sep="_")) %>%
  dplyr::select(UniqID, Population2, Runx1b_RFP, Gfi1_GFP,FACS_well)

# We know which well is only the Gfi1 reporter (based on plate layout excel file)
Gfi_mouse_well <- c(
  paste0(rep("G", each = ),sprintf("%02d",5:12)),
  paste0(rep(LETTERS[8:15], each = 23),sprintf("%02d",1:12)),
  paste0(rep("P", each=3),sprintf("%02d",10:13)))

df_inx_WN_S7 <- df_inx_WN_S7 %>%
  mutate(Allele = if_else(FACS_well %in% Gfi_mouse_well, "G", "RG")) %>%
  dplyr::select(-FACS_well)

df_indx <- rbind(df_inx_WN_S6, df_inx_WN_S7)

Load list of CC genes

s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes

Read list of cell surface genes

df_cs <- read.delim(file.path(dataDir, "Others/RM_cur_CS_mouse.txt"), stringsAsFactors = FALSE)

Merging (combine two objects)

We tested the Seurat integration workflow and find that it potentially over-integrate the data. So we will just stick with simply merging the data

outDir_inHouse1 <- file.path(outDir_cur, "01_Scater_analysis_YS_AGM_allCells")
dir.create(outDir_inHouse1)
## Warning in dir.create(outDir_inHouse1): '/Users/zfadlullah/Dropbox (The
## University of Manchester)/zaki/PhD/sequencing_runs/2021/2021_06_01_MZ_016/
## output/Chapter_04_InHouse_YS_AGM/01_Scater_analysis_YS_AGM_allCells' already
## exists

We loaded the single cell object in the Set-up section. Simply merge the two objects

dat1 <- assay(se_next, "counts")
dat2 <- assay(se_nova, "counts")
dat <- cbind(dat1,dat2)

c.anno1 <- colData(se_next)
c.anno2 <- colData(se_nova)
c.anno <- rbind(c.anno1, c.anno2)

g.anno <- rowData(se_next)
colnames(g.anno)[5] <- "chr_name"

se <- 
  SingleCellExperiment(
  assays = list(counts = dat),
  colData = c.anno,
  rowData = g.anno)

se$condition <- factor(se$condition, levels=com_popFac)

We remove the Gfi1/1b KO as we are not interested in this

AnoSite <- se$condition
AnoSite <- gsub("_.*", "", AnoSite)
table(AnoSite)
## AnoSite
##  AGM  MES   YS 
##  702   94 1504
se$Site <- AnoSite


# Remove Gfi1/1b KO
se <- se[,!se$condition == "YS_E9.5_HE_Gfi1s_KO"]

Lets remove tech duplicates, we select the ones with the most genes to keep.

cD <- as.data.frame(colData(se))

cD <- cD %>%
  # Find the lane in which the sample was sequenced on
  mutate(Lane = gsub("*.L001", "L1", FullName),
         Lane = gsub(".*L002", "L2", Lane),
         Lane = gsub("GL.*", "L1", Lane),
         SeqBatch = paste(SeqRun, Lane, sep="_"),
  # Find the Uniq ID
         Plate_Well = paste(SC_project, SC_plate, FACS_well, sep="_"))

# Identify tech replicates
df_com_twice <- as_tibble(cD) %>%
  dplyr::count(Plate_Well) %>% dplyr::filter(n > 1)

qc_cell <- perCellQCMetrics(se)
cD <- cbind(cD, qc_cell)

cD_noTech <- cD %>%
  arrange(desc(detected)) %>%
  distinct(Plate_Well,.keep_all = TRUE)

# Subset
se <- se[,se$Barcode %in% cD_noTech$Barcode]

Filtering lowly-expressed genes

Remove all ENS with 0 counts

row_sub <- rowSums(dat)
row_sub <- names(row_sub)[row_sub > 0]

Convert to gene names

se <- se[row_sub, ]

new.row.names <- uniquifyFeatureNames(
    rowData(se)$ID,
    rowData(se)$Symbol
)

rownames(se) <- new.row.names
head(rownames(se), 5)
## [1] "Xkr4"    "Gm1992"  "Gm19938" "Gm37381" "Rp1"

Normalisation

set.seed(123)
clust.se <- quickCluster(se) 
se <- computeSumFactors(se, cluster=clust.se, min.mean=0.1)
se <- logNormCounts(se)

Feature selection

set.seed(123)
dec.se <- modelGeneVar(se)

# Visualizing the fit:
fit.se <- metadata(dec.se)
plot(fit.se$mean, fit.se$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")
curve(fit.se$trend(x), col="dodgerblue", add=TRUE, lwd=2)

Cell cycle assginment

mm.pairs <- readRDS(system.file("exdata", "mouse_cycle_markers.rds", 
    package="scran"))


# Using Ensembl IDs to match up with the annotation in 'mm.pairs'.
assignments <- cyclone(se, mm.pairs, gene.names=rowData(se)$ID)
se$Phase <- assignments$phases

saveRDS(se, file.path(outDir_cur, "seNorm_YS_AGM.RDS"))

Dim reduction

Select the most variable genes

set.seed(123)
top.prop <- getTopHVGs(dec.se, prop=0.1)
top.se <- getTopHVGs(dec.se, n=2000)
length(top.prop)
## [1] 978
length(top.se)
## [1] 2000
chosen <- top.se

Run dim reduction

se <- runPCA(se, subset_row=chosen) 

percent.var <- attr(reducedDim(se), "percentVar")
plot(percent.var, log="y", xlab="PC", ylab="Variance explained (%)")

UMAP

set.seed(123)
se <- runUMAP(se, dimred="PCA", subset_row=chosen,
              pca=30,
              n_neighbors=30,
              min_dist=0.5)
plotUMAP(se, colour_by="condition")

Clustering

We use graph-based for quick cluster

# Graph based

g <- buildSNNGraph(se, k=10, use.dimred = 'PCA')
clust <- igraph::cluster_walktrap(g)$membership
se$GraphBased <- paste0("g_", sprintf("%02d",clust))

Visualise

Visualise UMAP better

umap.df.int <- reducedDim(se, "UMAP") %>%
  cbind(colData(se)) %>%
  as.data.frame() %>%
  dplyr::rename(UMAP_1 = V1,
                UMAP_2 = V2)

Plot

p1 <- 
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=SeqRun)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~SeqRun) +
  theme_custom +
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the Sequencing batch")

p2 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=SeqRun)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme_custom +
  theme(legend.position = "none") +
  facet_wrap(~condition) +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the FACS population")

p3 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=GraphBased)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme_custom +
  theme(legend.position = "right") +
  facet_wrap(~GraphBased) +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the in-silico clusters")

p4 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=Phase)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~Phase) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the cell cycle")


png(file.path(outDir_inHouse1, "facet_SeqBatch.png"), height=350, width = 850)
p1
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse1, "facet_FACS.png"), height=950, width = 1200)
p2
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse1, "facet_cluster.png"), height=950, width = 1200)
p3
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse1, "facet_CC.png"), height=350, width = 850)
p4
dev.off()
## quartz_off_screen 
##                 2

We can check the expression of the following genes

plotUMAP(se, colour_by=c("Folr1"))

plotUMAP(se, colour_by=c("Acta2"))

plotUMAP(se, colour_by=c("Ptn"))

plotUMAP(se, colour_by=c("Hbb-y"))

seOri <- se

Save the se object at this stage

saveRDS(seOri, file.path(outDir_inHouse1, "se_allCell.RDS"))
#seOri <- readRDS(file.path(outDir_inHouse1, "se_allCell.RDS"))

Subset data

outDir_inHouse2 <- file.path(outDir_cur, "02_Scater_analysis_YS_AGM_subset")
dir.create(outDir_inHouse2)
## Warning in dir.create(outDir_inHouse2): '/Users/zfadlullah/Dropbox (The
## University of Manchester)/zaki/PhD/sequencing_runs/2021/2021_06_01_MZ_016/
## output/Chapter_04_InHouse_YS_AGM/02_Scater_analysis_YS_AGM_subset' already
## exists

We now subset to the population of interest

We first remove the MES population

umap.df.int <- reducedDim(seOri, "UMAP") %>%
  cbind(colData(seOri)) %>%
  as.data.frame() %>%
  dplyr::rename(UMAP_1 = V1,
                UMAP_2 = V2)


umap.df.int_sub <- umap.df.int %>%
  dplyr::filter(UMAP_1 > -5) %>%
  dplyr::filter(!condition %in% c("MES_E10.5_Runx1"))

Now remove the Hbb cells

umap.df.int_sub <- umap.df.int_sub %>%
  dplyr::filter(!GraphBased %in% c("g_03", "g_17", "g_16"))

Also remove cells from the ACE sorted experiment

umap.df.int_sub <- umap.df.int_sub %>%
  mutate(TempCat = paste(condition, SeqRun, sep="_")) %>%
  dplyr::filter(!TempCat == "AGM_E10.5_HE_Gfi1_GL69") %>%
  #dplyr::filter(!TempCat == "AGM_E10.5_HE_Runx1_GL69") %>%
  dplyr::filter(!TempCat == "AGM_E10.5_ACE_pos_GL69")

Exclude the YS Gfi1 pos Kit Neg population

umap.df.int_sub <- umap.df.int_sub %>%
  dplyr::filter(!condition %in% c("YS_E9.0_HE_Gfi1_Kneg", "YS_E9.5_HE_Gfi1_Kneg", "YS_E10_HE_Gfi1_Kneg"))

Remove Folr1 expressing cells

plot(assay(seOri, "logcounts")["Folr1",])

# Make a cut off of ~7
#Folr1_high <- seOri$Barcode[assay(seOri, "logcounts")["Folr1",] > 6.5]
Folr1_high <- seOri$Barcode[assay(seOri, "logcounts")["Folr1",] > 2]

umap.df.int_sub <- umap.df.int_sub %>%
  dplyr::filter(!Barcode %in% Folr1_high)

For the YS data, we previously identified which cell to discard. We read in the file to ensure these cells are discarded

toRemove <- readRDS(file.path(outData_dir, "Cells_to_exclude.csv"))


umap.df.int_sub <- umap.df.int_sub %>%
  dplyr::filter(!Barcode %in% toRemove)

Show which cells were selected

umap.df.int <- umap.df.int %>%
  dplyr::mutate(isSel = if_else(Barcode %in% umap.df.int_sub$Barcode, "Retain", "Discard"))


p1 <- 
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=isSel)) +
  geom_point() +
  theme(legend.position = "right") +
  theme_custom +
  NULL

p1

png(file.path(outDir_inHouse1, "Selected_cells.png"), height=350, width = 450)
p1
dev.off()
## quartz_off_screen 
##                 2

There is a cluster of cells in the region of where most of the cells were removed. Not sure what this cells are. It does not belong to a specific clsuter. It seem to express high lvls of Mito

# Manually remove them
man_remove <- umap.df.int_sub %>%
  dplyr::filter(GraphBased == "g_04" & UMAP_1 <=-1)  %>%
  pull(Barcode)
  
umap.df.int_sub <- umap.df.int_sub %>%
  dplyr::filter(!Barcode %in% man_remove)

  
umap.df.int <- umap.df.int %>%
  dplyr::mutate(isSel = if_else(Barcode %in% umap.df.int_sub$Barcode, "Retain", "Discard"))


p1 <- 
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=isSel)) +
  geom_point() +
  theme(legend.position = "right") +
  theme_custom +
  NULL

p1

png(file.path(outDir_inHouse1, "Selected_cells.png"), height=350, width = 450)
p1
dev.off()
## quartz_off_screen 
##                 2

Subset and Re-do all the steps

se <- seOri[,seOri$Barcode %in% umap.df.int_sub$Barcode]

set.seed(123)
dec.se <- modelGeneVar(se)

# Visualizing the fit:
fit.se <- metadata(dec.se)
plot(fit.se$mean, fit.se$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression")
curve(fit.se$trend(x), col="dodgerblue", add=TRUE, lwd=2)

top.se <- getTopHVGs(dec.se, n=2500)
chosen <- top.se

se <- runPCA(se, subset_row=chosen) 
se <- runUMAP(se, dimred="PCA", subset_row=chosen,
              pca=30,
              n_neighbors=30,
              min_dist=0.3)
plotUMAP(se, colour_by="condition")

UMAP when CC is removed

diff <- getVarianceExplained(se, "Phase")
discard <- diff > 5

top.hvgs2 <- getTopHVGs(se[which(!discard),], n=2500)
se.nocycle <- runPCA(se, subset_row=top.hvgs2)

se.nocycle <- runUMAP(se.nocycle, dimred="PCA", subset_row=top.hvgs2,
              pca=30,
              n_neighbors=30,
              min_dist=0.3)
plotUMAP(se.nocycle, colour_by="condition")

# Re-run clusters

g <- buildSNNGraph(se.nocycle, k=10, use.dimred = 'PCA')
clust <- igraph::cluster_walktrap(g)$membership
se.nocycle$GraphBased <- paste0("g_", sprintf("%02d",clust))

Save either the UMAP with CC removed or not

saveRDS(se, file=file.path(outData_dir, "se_YS_AGM.RDS"))
#se <- readRDS(file.path(outData_dir, "se_YS_AGM.RDS"))

Visualise

Visualise UMAP better

umap.df.int <- reducedDim(se, "UMAP") %>%
  cbind(colData(se)) %>%
  as.data.frame() %>%
  dplyr::rename(UMAP_1 = V1,
                UMAP_2 = V2)

Plot

p1 <- 
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=SeqRun)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~SeqRun) +
  theme_custom +
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the Sequencing batch")

p2 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=SeqRun)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~condition) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the FACS population")

p3 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=GraphBased)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~GraphBased) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the in-silico clusters")

p4 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=Phase)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~Phase) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the cell cycle")


png(file.path(outDir_inHouse2, "facet_SeqBatch.png"), height=350, width = 850)
p1
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse2, "facet_FACS.png"), height=950, width = 1200)
p2
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse2, "facet_cluster.png"), height=950, width = 1200)
p3
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse2, "facet_CC.png"), height=350, width = 850)
p4
dev.off()
## quartz_off_screen 
##                 2

Overlay the previously lablled YS data

cD_YS <- as.data.frame(colData(se_YS)) %>%
  dplyr::select(Barcode, GraphBased, Cond_fac) %>%
  dplyr::rename(GraphBased_YS = GraphBased)

umap.df.int2 <- umap.df.int %>%
  dplyr::left_join(cD_YS) %>%
  dplyr::mutate(UniqID = paste(SC_project, SC_plate, FACS_well, sep="_"))
## Joining, by = "Barcode"
p1 <-        
  ggplot(umap.df.int2, aes(x=UMAP_1, y=UMAP_2, colour=GraphBased_YS)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~GraphBased_YS) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the in-silico clusters")




png(file.path(outDir_inHouse2, "facet_cluster_YS_AGM_highlight_YS.png"), height=550, width = 950)
p1
dev.off()
## quartz_off_screen 
##                 2

Overlay the AGM data

cD_AGM <- as.data.frame(colData(se.block_f)) %>%
  dplyr::mutate(
    tmp1 = gsub('([a-zA-Z])', "", FACS_well),
    tmp1 = sprintf("%02d", as.numeric(tmp1)),
    tmp2 = gsub('[0-9]', "", FACS_well),
    FACS_well = paste0(tmp2, tmp1),
    UniqID = paste(SC_project, SC_plate, FACS_well, sep="_")) %>%
  dplyr::select(UniqID, finalCluster2)
  
umap.df.int2 <- umap.df.int2 %>%
  dplyr::left_join(cD_AGM) %>%
  mutate(finalCluster2 = as.character(finalCluster2))
## Joining, by = "UniqID"
p1 <-        
  ggplot(umap.df.int2, aes(x=UMAP_1, y=UMAP_2, colour=finalCluster2)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~finalCluster2) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the in-silico clusters")


png(file.path(outDir_inHouse2, "facet_cluster_YS_AGM_highlight_AGM.png"), height=550, width = 950)
p1
dev.off()
## quartz_off_screen 
##                 2

Save the metaData

write.csv(umap.df.int2, file.path(outDir_inHouse2, "Metadata_YS_AGM.csv"))

Seurat

outDir_inHouse3 <- file.path(outDir_cur, "03_Seurat_analysis_YS_AGM_subset")
dir.create(outDir_inHouse3)
## Warning in dir.create(outDir_inHouse3): '/Users/zfadlullah/Dropbox (The
## University of Manchester)/zaki/PhD/sequencing_runs/2021/2021_06_01_MZ_016/
## output/Chapter_04_InHouse_YS_AGM/03_Seurat_analysis_YS_AGM_subset' already
## exists

Lets try use seurat to analyse the data

inhouse.combined <- merge(seuset_nova, y = seuset_next, add.cell.ids = c("nova", "next"), project = "inhouse")

Do normal processing

#se <- se.nocycle
seuset <- inhouse.combined
seuset <- seuset[,seuset$Barcode %in% se$Barcode ]

# Match the order
dd <- seuset[[]]
cD <- as.data.frame(colData(se)) %>%
  dplyr::select(Barcode, GraphBased )

dd <- dd %>%
  dplyr::left_join(cD)
## Joining, by = "Barcode"
seuset$GraphBased <- factor(dd$GraphBased, levels=paste0("g_", sprintf("%02d",1:length(unique(dd$GraphBased)))))

Do normal processing

# Feature selections
seuset <- NormalizeData(seuset)
seuset <- 
  FindVariableFeatures(object = seuset, selection.method = "vst", nfeatures = 2500)
# Scaling the data
all.genes <- rownames(x = seuset)
seuset <- CellCycleScoring(seuset, s.features = s.genes, g2m.features = g2m.genes, set.ident = FALSE)
## Warning: The following features are not present in the object: MCM5, PCNA,
## TYMS, FEN1, MCM7, MCM4, RRM1, UNG, GINS2, MCM6, CDCA7, DTL, PRIM1, UHRF1, CENPU,
## HELLS, RFC2, POLR1B, NASP, RAD51AP1, GMNN, WDR76, SLBP, CCNE2, UBR7, POLD3,
## MSH2, ATAD2, RAD51, RRM2, CDC45, CDC6, EXO1, TIPIN, DSCC1, BLM, CASP8AP2, USP1,
## CLSPN, POLA1, CHAF1B, MRPL36, E2F8, not searching for symbol synonyms
## Warning: The following features are not present in the object: HMGB2, CDK1,
## NUSAP1, UBE2C, BIRC5, TPX2, TOP2A, NDC80, CKS2, NUF2, CKS1B, MKI67, TMPO, CENPF,
## TACC3, PIMREG, SMC4, CCNB2, CKAP2L, CKAP2, AURKB, BUB1, KIF11, ANP32E, TUBB4B,
## GTSE1, KIF20B, HJURP, CDCA3, JPT1, CDC20, TTK, CDC25C, KIF2C, RANGAP1, NCAPD2,
## DLGAP5, CDCA2, CDCA8, ECT2, KIF23, HMMR, AURKA, PSRC1, ANLN, LBR, CKAP5, CENPE,
## CTCF, NEK2, G2E3, GAS2L3, CBX5, CENPA, not searching for symbol synonyms
## Warning in AddModuleScore(object = object, features = features, name = name, :
## Could not find enough features in the object from the following feature lists:
## S.Score Attempting to match case...Could not find enough features in the object
## from the following feature lists: G2M.Score Attempting to match case...
# Add either originated from nova or next
Seq_name <- colnames(seuset)
Seq_name <- gsub("_.*", "", Seq_name)
seuset$Sequencer <- Seq_name

# Dont need to regress anything at this stage as we only want to remove the tech rep and outlier
seuset <- ScaleData(seuset, 
                    #vars.to.regress = c("S.Score", "G2M.Score", "Sequencer"), 
                    features = rownames(seuset))
## Centering and scaling data matrix
seuset <- RunPCA(seuset, verbose = FALSE)
seuset <- RunUMAP(seuset, dims = 1:30, verbose = FALSE)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
seuset <- FindNeighbors(seuset, dims = 1:30, verbose = FALSE)
seuset <- FindClusters(seuset, verbose = FALSE)

Plot Seurat output

DimPlot(seuset)

FeaturePlot(seuset, c("Ace", "Lyve1"))

Visualise

Visualise UMAP better

umap.df.int <- as.data.frame(Embeddings(seuset, reduction="umap"))  %>%
  cbind(seuset[[]]) %>%
  as.data.frame() %>%
  mutate(condition = factor(condition, levels=com_popFac))

Plot

p1 <- 
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=SeqRun)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~SeqRun) +
  theme_custom +
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the Sequencing batch")

p2 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=SeqRun)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~condition) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the FACS population")

p3 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=seurat_clusters)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~seurat_clusters) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the in-silico clusters")

p4 <-        
  ggplot(umap.df.int, aes(x=UMAP_1, y=UMAP_2, colour=Phase)) +
  geom_point() +
  gghighlight(use_direct_label = FALSE) +
  theme(legend.position = "right") +
  facet_wrap(~Phase) +
  theme_custom +
  #scale_colour_manual(values=g_hue) + 
  #scale_colour_manual(values=tableau20) + 
  labs(title="Highlighting the cell cycle")


png(file.path(outDir_inHouse3, "facet_SeqBatch.png"), height=350, width = 850)
p1
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse3, "facet_FACS.png"), height=950, width = 1200)
p2
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse3, "facet_cluster.png"), height=950, width = 1200)
p3
dev.off()
## quartz_off_screen 
##                 2
png(file.path(outDir_inHouse3, "facet_CC.png"), height=350, width = 850)
p4
dev.off()
## quartz_off_screen 
##                 2

Paga preparation

If we are starting from single cell experiment object we convert the single cell experiment to seurat

seu <- Seurat::as.Seurat(se)
## Warning: The column names of the counts matrix is NULL. Setting cell names to
## cell_columnidx (e.g 'cell_1').
## Warning: The column names of the data matrix is NULL. Setting cell names to
## cell_columnidx (e.g 'cell_1').
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')

## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from PC to PC_
## Warning: All keys should be one or more alphanumeric characters followed by an
## underscore '_', setting key to PC_

We need to clean up the Seurat object a little bit. We can not have any NA vales in columns and need to remove the ‘graph’ object

#seu <- seuset
# Clean up the column annotaion
dd <- seu[[]]
dd2 <- dd %>%
  dplyr::select(condition, GraphBased) %>%
  dplyr::rename(cluster = GraphBased)
  #dplyr::select(cluster) %>%
  #mutate(tmpID = "test")

seu@meta.data <- dd2
# Remove the graph # https://github.com/theislab/scanpy/issues/598
seu@graphs <- list()

Create a new seurat object

kk1 <- GetAssayData(object = seu, slot = "counts")

kk_name <- gsub("nova_", "", colnames(kk1))
kk_name <- gsub("next_", "", kk_name)

colnames(kk1) <- kk_name

kk2 <- seu[[]]
rownames(kk2) <- kk_name

seu <- 
  Seurat::CreateSeuratObject(counts = kk1, 
                     project = "InHouse", 
                     meta.data = kk2,
                     min.cells = 3, min.features = 200)
seu <- NormalizeData(
  object = seu, 
  normalization.method = "LogNormalize", 
  scale.factor = 10000
)
seu@graphs <- list()

seu <- 
  FindVariableFeatures(object = seu, selection.method = "vst", nfeatures = 2500)
# Scaling the data
all.genes <- rownames(x = seu)
seu <- ScaleData(object = seu, features = all.genes)
## Centering and scaling data matrix

Save the object as RDS

saveRDS(seu, file.path(outData_dir, "seu_inHouse_YS_AGM_forLoom.RDS"))
  1. Transfer the RDS file to the cluster. Read the R object in the R Studio version on the cluster
# Transfer
# scp seu_inHouse_noTechRep_forLoom.RDS zfadlullah@192.168.203.4

# Using the R session 4.0.3
# Seurat version 3.9.9.9010
# Need to load hdf5 http://scicom.picr.man.ac.uk/issues/6046#change-12123

dyn.load('/lmod/libs/hdf5/1.10.4/lib/libhdf5_hl.so.100')
library(hdf5r)
library(Seurat)
library(hdf5r)
library(loomR)
library(dplyr)

seu <- readRDS("seu_inHouse_forLoom.RDS")
as.loom(seu, filename = "seu_inHouse.loom")
  1. Transfer back to seu.loom file from the cluster to our local machine. Now we run the juypter notebook.
# scp zfadlullah@192.168.203.4:/home/zfadlullah/seu.loom ~/Desktop/PAGA_inHouse_merge

Others

We can also do a few other things

Using DE genes

For UMAP and hiearchical clustering

# Find unique genes
colLabels(se) <- se$condition

# We can change the pval.type="all" to be more stringent
markers.se <- findMarkers(se, pval.type="some", direction="up")
## Warning in FUN(...): no within-block comparison between AGM_E10.5_ACE_pos and
## AGM_E10.5_Endo
## Warning in FUN(...): no within-block comparison between AGM_E10.5_HE_Runx1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between AGM_E10.5_HE_Gfi1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between AGM_E10.5_EHT and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between AGM_E10.5_IAHC and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between MES_E10.5_Runx1 and
## AGM_E10.5_Endo
## Warning in FUN(...): no within-block comparison between MES_E10.5_Runx1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between MES_E10.5_Runx1 and
## AGM_E10.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between MES_E10.5_Runx1 and
## AGM_E10.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between MES_E10.5_Runx1 and
## AGM_E10.5_EHT
## Warning in FUN(...): no within-block comparison between MES_E10.5_Runx1 and
## AGM_E10.5_IAHC
## Warning in FUN(...): no within-block comparison between YS_E9.0_Endo and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.0_Endo and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_Endo and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_Endo and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10.5_Endo and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E10.5_Endo and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## AGM_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## AGM_E10.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## AGM_E10.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## AGM_E10.5_EHT
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## AGM_E10.5_IAHC
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## YS_E9.0_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## YS_E9.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1_Kneg and
## YS_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## AGM_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## AGM_E10.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## AGM_E10.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## AGM_E10.5_EHT
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## AGM_E10.5_IAHC
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## YS_E9.0_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## YS_E9.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## YS_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1_Kneg and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## AGM_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## AGM_E10.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## AGM_E10.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## AGM_E10.5_EHT
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## AGM_E10.5_IAHC
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## YS_E9.0_Endo
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## YS_E9.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## YS_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10_HE_Gfi1_Kneg and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Runx1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Runx1 and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Runx1 and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Runx1 and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Runx1 and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Runx1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Runx1 and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Runx1 and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Runx1 and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Runx1 and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Runx1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Runx1 and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Runx1 and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Runx1 and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Runx1 and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1 and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1 and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1 and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.0_HE_Gfi1 and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1 and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1 and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1 and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1 and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Gfi1 and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Gfi1 and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Gfi1 and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Gfi1 and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_HE_Gfi1 and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_EMP and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_EMP and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_EMP and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_EMP and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_EMP and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_EMP and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E10.5_EMP and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10.5_EMP and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_EMP and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_EMP and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_LMP and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_LMP and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_LMP and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_LMP and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_LMP and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_LMP and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E10.5_LMP and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E10.5_LMP and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_LMP and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E10.5_LMP and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## AGM_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## AGM_E10.5_ACE_pos
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## AGM_E10.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## AGM_E10.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## AGM_E10.5_EHT
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## AGM_E10.5_IAHC
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## MES_E10.5_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.0_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E10.5_Endo
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.0_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.5_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E10_HE_Gfi1_Kneg
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.0_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E10.5_HE_Runx1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.0_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E10.5_HE_Gfi1
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.5_EMP
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E10.5_EMP
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E9.5_LMP
## Warning in FUN(...): no within-block comparison between YS_E9.5_HE_Gfi1s_KO and
## YS_E10.5_LMP
# For each cluster 
#intClus <- c(paste0("l_", sprintf("%02d", 1:14)))
intClus <- unique(se$condition)


# Write function
topGene <- function(markers.se, chosen_clus){
  interesting <- markers.se[[chosen_clus]]
  best.set <- as.data.frame(interesting) %>%
    tibble::rownames_to_column(var = "Gene") %>%
    dplyr::filter(summary.logFC >= 1) %>%
    dplyr::filter(p.value <=0.05) %>%
    dplyr::mutate(group = chosen_clus) %>%
    dplyr::select(Gene, summary.logFC, p.value, FDR, group)
  
  return(best.set)
}

# Repeat for each clusters
df_list <- list()

for (i in seq_along(intClus)){
  df <- topGene(markers.se, intClus[i])
  df_list[[i]] <- df
}

# Make into one big df
df_de <- do.call(rbind, df_list)

# Filter
df_de_sub <- df_de %>%
  dplyr::filter(FDR < 0.05) %>%
  dplyr::filter(summary.logFC >= 2)

#dplyr::filter(df_de_sub, Gene == "Runx1")

UniqGenes <- unique(df_de_sub$Gene)
length(UniqGenes)
## [1] 2761
chosen <- UniqGenes

Find the UMAP

# ------------------ UMAP ---------------- #
# Using the Top DEG
# Calculate PCA
exp.sub <- assay(se2, "logcounts")[chosen,]
exp.sub <- t(exp.sub)
pca <- prcomp(exp.sub,scale=TRUE)
numPCA <- 1:30
df_PCA <- as.data.frame(pca$x)[,numPCA]


# UMAP parameter
n_neig <- c(16, 32, 64, 128)
min_di <- c(0.01, 0.05, 0.1, 0.5, 0.8, 1)
ran_state <- as.integer(999)
n_dim <- 2

colp <- df.col_FACS$Pal
cond_ID <- se2$condition

# Run the UMAP 
min_di %>% 
  map_df(~umap(as.matrix(df_PCA), min_dist = .x,
               n_components = n_dim,
               random_state = ran_state)$layout %>% 
           as.data.frame() %>%
           dplyr::rename(UMAP1 = V1, UMAP2=V2) %>%
           dplyr::mutate(condition = cond_ID, Distance = .x)) %>% 
  ggplot(aes(UMAP1, UMAP2, color = condition)) + 
  geom_point(size=1) +
  theme_bw() +
  scale_colour_manual(values=colp) +
  facet_wrap(~Distance, scales = "free")

# 0.5 looks good

# Run the UMAP
n_neig %>% 
  map_df(~umap(as.matrix(df_PCA), n_neighbors = .x,
               #min_dist = 0.1,
               n_components = n_dim,
               random_state = ran_state)$layout %>% 
           as.data.frame() %>%
           dplyr::rename(UMAP1 = V1, UMAP2=V2) %>%
           dplyr::mutate(condition = cond_ID, Neighbor = .x)) %>% 
  ggplot(aes(UMAP1, UMAP2, color = condition)) + 
  geom_point(size=1) +
  theme_bw() +
  scale_colour_manual(values=colp) +
  facet_wrap(~Neighbor, scales = "free")



# Parameters
n_neig <- 64
min_di <- 0.5
n_dim <- 2

set.seed(123)
umap.mat <- uwot::umap(df_PCA,ret_model = TRUE,
                       n_neighbors=n_neig,
                       min_dist=min_di,
                       n_components = n_dim)
umap.df <- as.data.frame(umap.mat$embedding)
#colnames(umap.df) <- c("V1", "V2", "V3")
colnames(umap.df) <- c("V1", "V2")

umap.df <- umap.df %>%
  dplyr::mutate(condition = se2$condition,
                cluster = se2$TmpClust)

ggplot(umap.df, aes(x=V1, y=V2,colour=condition)) +
  geom_point() +
  gghighlight() +
  facet_wrap(~condition) +
  theme_bw() +
  scale_colour_manual(values=colp)


ggplot(umap.df, aes(x=V1, y=V2,colour=cluster)) +
  geom_point() +
  gghighlight() +
  facet_wrap(~cluster) +
  theme_bw() 

3D plot

# 3D plot
x1 <- umap.df$V1
x2 <- umap.df$V2
x3 <- umap.df$V3

df_pal <- df.col_FACS %>%
  dplyr::filter(FACS %in% umap.df$condition)
pal <- df_pal$Pal

kk <- umap.df$condition
kk.1 <- unique(umap.df$condition)
kk <- factor(as.character(kk), 
                levels=c(
                  "AGM_E10.5_Endo", "AGM_E10.5_HE_Runx1", "AGM_E10.5_HE_Gfi1", "AGM_E10.5_EHT", "AGM_E10.5_IAHC",
                
                "YS_E9.0_Endo", "YS_E9.5_Endo", "YS_E10.5_Endo", 
                "YS_E9.0_HE_Gfi1_Kneg", "YS_E9.5_HE_Gfi1_Kneg", "YS_E10_HE_Gfi1_Kneg",
                
                "YS_E9.0_HE_Runx1", "YS_E9.5_HE_Runx1", "YS_E10.5_HE_Runx1", 
                "YS_E9.0_HE_Gfi1", "YS_E9.5_HE_Gfi1", "YS_E10.5_HE_Gfi1",
                
                "YS_E9.5_EMP","YS_E10.5_EMP",
                "YS_E9.5_LMP", "YS_E10.5_LMP"))
kk[is.na(pal[kk])] %>% unique()

pal <- pal[kk]

plot3d(x1, x2, x3, col = pal)


# Or use plotly
plot_ly(umap.df, x= ~V1, y= ~V2, z= ~V3, color =  ~Cluster)

Hierarchcial clustering

# ------------------ HC ---------------- #
chosen.exprs <- assay(se2, "logcounts")[chosen,]
my.dist = dist(as.matrix(t(chosen.exprs)), method = "euclidean")
my.tree <- hclust(my.dist, method="ward.D2")
my.dend <- as.dendrogram(my.tree, hang=0.1)
my.clusters <- unname(cutreeDynamic(my.tree, distM=as.matrix(my.dist), verbose=0, minClusterSize=10, deepSplit=2))
#my.clusters <- paste0("k_", my.clusters)
length(unique(my.clusters))
se2$TmpClust <- paste0("k_", sprintf("%02d",my.clusters))


plotUMAP(se2, colour="TmpClust")
table(se2$condition, se2$TmpClust)

# Making a prettier dendrogram.
my.tree$labels <- seq_along(my.tree$labels)
#dend <- as.dendrogram(my.tree, hang=0.1)
numClus <- length(unique(my.clusters))
coltree <- gg_color_hue(n=numClus)

my.tree_or <- my.clusters[order.dendrogram(my.dend)]

# Setting up colours for the FACS population
cFACS <- coltree[as.factor(se$TreeCut)]
cFACS <- coltree[as.factor(se$TreeCut)[order.dendrogram(my.dend)]]

no0_unique <- 
  function(x) {
    u_x <- unique(x)
    u_x[u_x != 0]
  }

clusters_numbers <- no0_unique(my.tree_or)
n_clusters <- length(clusters_numbers)
cols <- gg_color_hue(n=n_clusters)
dend2 <- branches_attr_by_clusters(my.dend, my.tree_or, values = cols) %>%
  set("labels_col", "black")
plot(dend2)
colored_bars(cFACS, dend2, sort_by_labels_order = FALSE, y_shift=10)

UniqClus <- unique(my.tree_or)

pdf(file.path(outDir_noLMP, "Dend.pdf"),height = 4, width = 12)
plot(dend2)
colored_bars(cFACS, dend2, sort_by_labels_order = FALSE, y_shift=10)

plot(UniqClus, ylim = c(-1, length(UniqClus)))
text(x =1:length(UniqClus), y=-1, UniqClus)

dev.off()

se$TreeCut <- paste0("k_", sprintf("%02d",my.clusters))

Session Info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] dynamicTreeCut_1.63-1       dendextend_1.15.1          
##  [3] scran_1.20.1                ggrepel_0.9.1              
##  [5] gghighlight_0.3.2           ggbeeswarm_0.6.0           
##  [7] reshape2_1.4.4              dplyr_1.0.6                
##  [9] patchwork_1.1.1             SeuratObject_4.0.4         
## [11] Seurat_4.0.6                scater_1.20.1              
## [13] ggplot2_3.3.4               scuttle_1.2.0              
## [15] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [17] Biobase_2.52.0              GenomicRanges_1.44.0       
## [19] GenomeInfoDb_1.28.0         IRanges_2.26.0             
## [21] S4Vectors_0.30.0            BiocGenerics_0.38.0        
## [23] MatrixGenerics_1.4.0        matrixStats_0.59.0         
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.6                igraph_1.2.6             
##   [3] lazyeval_0.2.2            splines_4.1.0            
##   [5] BiocParallel_1.26.0       listenv_0.8.0            
##   [7] scattermore_0.7           digest_0.6.27            
##   [9] htmltools_0.5.1.1         viridis_0.6.1            
##  [11] fansi_0.5.0               magrittr_2.0.1           
##  [13] ScaledMatrix_1.0.0        tensor_1.5               
##  [15] cluster_2.1.2             ROCR_1.0-11              
##  [17] limma_3.48.0              globals_0.14.0           
##  [19] spatstat.sparse_2.0-0     colorspace_2.0-1         
##  [21] xfun_0.24                 crayon_1.4.1             
##  [23] RCurl_1.98-1.3            jsonlite_1.7.2           
##  [25] spatstat.data_2.1-0       survival_3.2-11          
##  [27] zoo_1.8-9                 glue_1.4.2               
##  [29] polyclip_1.10-0           gtable_0.3.0             
##  [31] zlibbioc_1.38.0           XVector_0.32.0           
##  [33] leiden_0.3.8              DelayedArray_0.18.0      
##  [35] BiocSingular_1.8.1        future.apply_1.7.0       
##  [37] abind_1.4-5               scales_1.1.1             
##  [39] edgeR_3.34.0              DBI_1.1.1                
##  [41] miniUI_0.1.1.1            Rcpp_1.0.7               
##  [43] viridisLite_0.4.0         xtable_1.8-4             
##  [45] dqrng_0.3.0               reticulate_1.20          
##  [47] spatstat.core_2.2-0       rsvd_1.0.5               
##  [49] metapod_1.0.0             htmlwidgets_1.5.3        
##  [51] httr_1.4.2                FNN_1.1.3                
##  [53] RColorBrewer_1.1-2        ellipsis_0.3.2           
##  [55] ica_1.0-2                 farver_2.1.0             
##  [57] pkgconfig_2.0.3           uwot_0.1.10              
##  [59] sass_0.4.0                deldir_0.2-10            
##  [61] locfit_1.5-9.4            utf8_1.2.1               
##  [63] labeling_0.4.2            tidyselect_1.1.1         
##  [65] rlang_0.4.11              later_1.2.0              
##  [67] munsell_0.5.0             tools_4.1.0              
##  [69] generics_0.1.0            ggridges_0.5.3           
##  [71] evaluate_0.14             stringr_1.4.0            
##  [73] fastmap_1.1.0             goftest_1.2-2            
##  [75] knitr_1.33                fitdistrplus_1.1-5       
##  [77] purrr_0.3.4               RANN_2.6.1               
##  [79] nlme_3.1-152              pbapply_1.4-3            
##  [81] future_1.21.0             sparseMatrixStats_1.4.0  
##  [83] mime_0.10                 compiler_4.1.0           
##  [85] beeswarm_0.4.0            plotly_4.9.4.1           
##  [87] png_0.1-7                 spatstat.utils_2.2-0     
##  [89] statmod_1.4.36            tibble_3.1.2             
##  [91] bslib_0.2.5.1             stringi_1.6.2            
##  [93] highr_0.9                 RSpectra_0.16-0          
##  [95] bluster_1.2.1             lattice_0.20-44          
##  [97] Matrix_1.3-3              vctrs_0.3.8              
##  [99] pillar_1.6.1              lifecycle_1.0.0          
## [101] spatstat.geom_2.2-0       lmtest_0.9-38            
## [103] jquerylib_0.1.4           RcppAnnoy_0.0.18         
## [105] BiocNeighbors_1.10.0      data.table_1.14.0        
## [107] cowplot_1.1.1             bitops_1.0-7             
## [109] irlba_2.3.3               httpuv_1.6.1             
## [111] R6_2.5.1                  promises_1.2.0.1         
## [113] KernSmooth_2.23-20        gridExtra_2.3            
## [115] vipor_0.4.5               parallelly_1.26.0        
## [117] codetools_0.2-18          MASS_7.3-54              
## [119] assertthat_0.2.1          withr_2.4.2              
## [121] sctransform_0.3.2         GenomeInfoDbData_1.2.6   
## [123] mgcv_1.8-35               rpart_4.1-15             
## [125] grid_4.1.0                beachmat_2.8.0           
## [127] tidyr_1.1.3               rmarkdown_2.9            
## [129] DelayedMatrixStats_1.14.0 Rtsne_0.15               
## [131] shiny_1.6.0